Celestin Apprentice 4

home *** CD-ROM | disk | FTP | other *** search

/ Celestin Apprentice 4 / Apprentice-Release4.iso / Source Code / Add-Ons / MPW / MPW rman 1.3.4 / rman.c < prev next >

Wrap

C/C++ Source or Header | 1995-11-08 | 60.2 KB | 2,086 lines | [TEXT/KAHL]

/* RosettaMan Copyright (c) 1993-1995 T.A. Phelps (phelps@cs.Berkeley.EDU) All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research and non-profit purposes, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and the following paragraph appears in all copies. Permission to incorporate this software into commercial products may be obtained from the Office of Technology Licensing, 2150 Shattuck Avenue, Suite 510, Berkeley, CA 94704. $Header: /home/auspex/h/bair/phelps/spine/rman/RCS/rman.c,v 1.34 1995/07/02 01:01:49 phelps Exp phelps $ */ #include <stdio.h> #include <string.h> #include <ctype.h> #include <stdlib.h> /* OSF seems to need this */ #ifdef I_UNISTD #include <unistd.h> #endif /* I_UNISTD */ /*** make #define's into consts? => can't because compilers not smart enough ***/ /* maximum number of tags per line */ #define MAXTAGS 50 #define MAXTOC 500 /* minimum column for right margin */ #define MINRM 50 #define MINMID 20 /*#define MAXINDENT 15*/ /*#define HEADFOOTMATCH 20*/ #define HEADFOOTSKIP 20 #define HEADFOOTMAX 25 /* length of unique filter prefix */ #define UFP 2 #define xputchar(c) if (fcharout) putchar(c) /*enum { c_dagger=0xa7, c_plusminus=0xb1, c_bullet=0xb7 };*/ #define c_bullet '\xb7' #define c_plusminus '\xb1' #define c_dagger '\xa7' #define c_lsquote '\x60' #define c_rsquote '\x27' /* accept man pages as formatted by (10) Hewlett-Packard HP-UX, AT&T System V, SunOS, Sun Solaris, OSF/1, DEC Ultrix, SGI IRIX, Linux, FreeBSD output as (10) printable ASCII, headers only, TkMan, [tn]roff, Ensemble, SGML, HTML, LaTeX, RTF, Perl pod written March 24, 1993 bs2tk transformed into RosettaMan November 4-5, 1993 1993 2-Apr bullets, change bars, copyright symbol 5 boldface, other SGI nicks 7 skip unrecognized escape codes 10 small caps 13 underscores considered uppercase so show up in default small caps font screen out Ultrix junk (code getting pretty tangled now) 14 until Tk text has better tab support, replace tabs by spaces until get to next tab stop (for Ultrix) -t gives tabstop spacing 20 Solaris support (Larry Tsui) 3-Jun section subheading parsing (Per-Erik Martin) 28 hyphenated man pages in SEE ALSO show up correctly in Links (Mike Steele) 13-Jul under FILES, fully qualified path names are added to Links, but this taken out immediately because not useful 14 option to keep changebars on right (Warren Jessop) 5-Aug search for header, footer dynamically-- no need to edit or search large list of patterns 11 -m kicks in man page formatting beyond nroff backspace kludges 27 handle double digit numbers better by trying again relative to end of line 19-Sep -T gives Tk extras (otherwise ASCII only) -H gives headers only (implies -T off) 10-Oct -r reverse compiles to [tn]roff source (as Geoff Collyer's nam and fontch, but leveraging existing analysis so only addition of ~60 lines) (The code is device-driver obscure now--obfuscated C contest next.) 13 header and footer optionally available at bottom in Tk view (Marty Leisner) 19 "reflected" odd and even page headers&footers zapped 20 keep count of sections and subsections, using smaller font for larger numbers 1-Nov reverse compiles to Ensemble, except for character ranges 4 started rman rewrite for cleaner support of multiple output targets, including: plain ascii, headers only, TkMan, [nt]roff, Ensemble, SGML, HTML 5 line filtering separated from other logic despite greater sophistication, RosettaMan faster than bs2tk (!) 28-Dec man page reference recognition (Michael Harrison) 1994 1-Jan identify descriptive lists by comparing scnt2 with s_avg 3 tail-end table of contents in HTML documents 5 -f <filter> and LaTeX output mode 24 proof-of-concept RTF output mode 26 handle man pages that don't have a header on the first page 28 parse "handwritten" man pages 22-Feb alpha version released 6-Mar various bug fixes 10 beta version released 13-Jun fixed surious generation on <DL>'s (the existence of which pointed out by David Sibley) 22-Jul table recognition experiment. works reasonably well, except for tables with centered headers 3 allow for off-by-one (and -two) in identification of header and footer fixed problem with recurrent/leftover text with OSF/1 bold bullets (yeesh) 12-Sep 2.0gamma released 13 check for *third* header, possibly centered, possibly after blank lines (Charles Anderson) fixed tag ranges for lines following blank lines (just \n) of pages with global indentation (Owen Rees) 19 fixed two small problems with LaTeX (^ => \^, \bullet => $\bullet$) (Neal Becker) 24 simple check for erroneously being fed roff source 26 deal with bold +- as in ksh (ugh) 30 2.0delta released 9-Oct special check for OSF to guard against section head interpreted as footer 8-Nov Perl pod output format (result still needs work, but not much) 7-Dec 2.0epsilon released (last one before final 2.0) 22 Happy Winter Solstice! 2.0 released deprecated gets() replaced (Robert Withrow) 25 TkMan module's $w.show => $t, saving about 9% in generated characters 1995 1-Jan experiment with TkMan output to take advantage of my hack to Tk text (i.e., $t insert end "text" => $t insert end "text1" tag1 "text2" tag2 ...) results => output size reduced about 25%, time reduced about 12-15% 25-Mar back to old mark command for Tk module 8-May hyphens in SEE ALSO section would confuse link-finder, so re-linebreak if necessary(!) (Greg Earle & Uri Guttman) ??-??? SGML output format (DTD found at long last), validated by sgmls */ /* TO DO **** output to SGML with Davenport DTD fix spurious additional newlines, as after umount Flags in /usr/man/man8/mount.8 don't give SHORTLINE if just finished bullet of bultxt, ended section head, ... other cases? make sure text following bullet is aligned correctly output to MIME? output to WinHelp? collect header and footer until hit blank line? what to do about tables? count second gap of spaces & average gap? ==> good idea but tables too variable for this to work internal, outline-like header section for HTML documents? how to put this *first*? one line look ahead to enable better parsing (item lists, et cetera) alluc (==nonlc) flag, copy curline to last line vector (works well with lookahead cache) ?? collect sundry globals into vectors (i.e., arrays and enum indexes) (if compiler has good constant propagation, then shouldn't slow access) collect scattered globals into vectors (e.g., curline[ispcnt]): array + enum curline, lastline, flags, pending, bufs+lens */ /*** tag management ***/ enum tagtype { TITLE, ITALICS, BOLD, SYMBOL, SMALLCAPS, BOLDITALICS, MANREF, MONO }; struct { int type; int first; int last; } tags[MAXTAGS+1]; int tagc=0; struct { char *text; int type; } toc[MAXTOC]; int tocc=0; /*** globals ***/ /* move all flags into an array? enum { fSubsX, fLast }; int flags[fLast]; */ int TabStops=8; int fSubsections=0; /* extract subsection titles too? */ int fChangeleft=0; /* move change bars to left */ int fChangezap=0; /* delete change bars */ int fMan=1; /* invoke agressive man page filtering? */ int fQS=0; /* squeeze out spaces (scnt and interword)? */ int fIQS=0; /* squeeze out initial spaces (controlled separately from fQS) */ int fILQS=0; /* squeeze out spaces for usual indent */ int fHeadfoot=0; /* show canonical header and footer at bottom? */ int falluc=0; int fintable=0; int fTable=0; int fotable=0; int hanging=0; /* location of hanging indent (if ==0, none) */ int fSEEALSO=0; /* in SEE ALSO section? */ int fFILES=0; int fNOHY=0; /* re-linebreak so no words are hyphenated */ char manName[80]="man page"; char manSect[80]="1"; char *manTitle = MANTITLEPRINTF; char *manRef = MANREFPRINTF; char *providence = "manual page source format generated by RosettaMan"; char *anonftp = "available via anonymous ftp from ftp.cs.berkeley.edu:/ucb/people/phelps/tcltk/rman.tar.Z"; int pmode=0; /* line or paragraph groupings of text */ int linelen; /* length of result in plain[] */ int spcsqz; /* number of spaces squeezed out */ int ccnt=0; /* # of changebars */ int scnt,scnt2; /* counts of initial spaces in line */ int s_sum,s_cnt; int bs_sum, bs_cnt; int ncnt=0,oncnt=0; /* count of interline newlines */ int CurLine=1; int indent=0; /* global indentation */ int lindent=0; /* usual local indent */ int auxindent=0; /* aux indent */ int I; /* index into line/paragraph */ int fcharout=1; /* show text or not */ char *escchars=""; char lookahead='\0'; char buf[BUFSIZ]; char plain[BUFSIZ]; /* current text line with control characters stripped out */ char hitxt[BUFSIZ]; /* highlighted text (available at BEGIN<highlight> signal */ char header[BUFSIZ]=""; /* complete line */ char footer[BUFSIZ]=""; char header2[BUFSIZ]=""; /* SGIs have two lines of headers and footers */ char header3[BUFSIZ]=""; /* GNU and some others have a third! */ char footer2[BUFSIZ]=""; int Psect=0, Psub=0, Pbp=0, Pbt=0, Pb=0; int fIP=0; /*** utility functions ***/ void addtag(int type, int first, int last) { if (tagc<MAXTAGS) { tags[tagc].type = type; tags[tagc].first = first; tags[tagc].last = last; tagc++; } } /* collect all saves to string table one one place, so that if decide to go with string table instead of multiple malloc, it's easy (probably few enough malloc's that more sophistication is unnecessary) */ void addtoc(char *text, int type, int endline) { char *r; if (tocc<MAXTOC) { r = malloc(strlen(text)+1); strcpy(r,text); toc[tocc].text = r; toc[tocc].type = type; tocc++; } } char phrase[BUFSIZ]; /* first "phrase" (space of >=3 spaces) */ int phraselen; void filterline(char *buf, char *plain) { char *p,*q,*r; char pp='\0'; char *ph; int ip,iq; int i; int hl=-1, hl2=-1; int iscnt=0; /* interword space count */ enum tagtype tag; ph=phrase; phraselen=0; scnt=scnt2=0; s_sum=s_cnt=0; bs_sum=bs_cnt=0; ccnt=0; spcsqz=0; /* strip only certain \x1b's and only at very beginning of line */ for (p=buf; *p=='\x1b'&& (p[1]=='8'||p[1]=='9'); p+=2) /* nop */; /*** tabs => spaces ***/ for (iq=0, /* p=buf,-- p set above!*/ q=plain; *p; p++) { if (*p=='\t') { do { *q++=' '; iq++; } while (iq%TabStops); } else { *q++=*p; iq++; if (*p=='\b') iq-=2; } } *q='\0'; /*** spaces and change bars ***/ for (scnt=0,p=plain; *p==' '; p++) scnt++; /* initial space count */ if (scnt) pp=' '; q--; if (fChangeleft || fChangezap) for (; q-40>plain && *q=='|'; q--) /* change bars */ if (fChangeleft) ccnt++; if (q!=&plain[scnt-1]) /* trailing */ for (; *q==' '; q--) /* nop */; q[1]='\0'; /* set I for tags below */ if (indent>=0 && scnt>=indent) scnt-=indent; if (!pmode && !fIQS) { if (fChangeleft) I+=(scnt>ccnt)?scnt:ccnt; else I+=scnt; } /*** tags and filler spaces ***/ iq=0; falluc=1; for (q=plain; *p; pp=*p,p++) { iscnt=0; if (*p==' ') { for (r=p; *r==' '; r++) { iscnt++; spcsqz++; } s_sum+=iscnt; s_cnt++; if (iscnt>1 && !scnt2 && *p==' ') scnt2=iscnt; if (iscnt>2) { bs_cnt++; bs_sum+=iscnt; } /* keep track of large gaps */ iscnt--; /* leave last space for tail portion of loop */ if (fQS && iscnt<3) { p=r-1; iscnt=0; } /* reduce strings of <3 spaces to 1 */ /* else if (fQS && iscnt>=3) { replace with tab? } */ else { for (i=0; i<iscnt; i++) { p++; *q++=' '; } } pp=' '; } /* need to go through if chain for closing off annotations */ /** backspace-related filtering **/ /* else */ if (*p=='\b' && p[1]=='_' && q>plain && q[-1]=='+') { /* bold plus/minus(!) */ q[-1]=c_plusminus; while (*p=='\b' && p[1]=='_') p+=2; continue; } else if ((*p=='_' && p[1]=='\b' && p[2]!='_' && p[3]!='\b') || (*p=='\b' && p[1]=='_')) { /* italics */ /* start tag only if not already in one */ if (hl==-1) { hl=I+iq; tag=ITALICS; } p+=2; } else if (*p=='_' && p[2]==p[4] && p[1]=='\b' && p[3]=='\b' && p[2]!='_') { /* bold italics (Solaris is BRAIN DEAD!) */ for (p+=2; *p==p[2] && p[1]=='\b';) p+=2; if (hl==-1) { hl=I+iq; tag=BOLDITALICS; } } else if (*p==p[2] && p[1]=='\b') { /* boldface */ while (*p==p[2] && p[1]=='\b') p+=2; if (hl==-1) { hl=I+iq; tag=BOLD; } } else if (p[1]=='\b' && ((*p=='o' && p[2]=='+') || (*p=='+' && p[2]=='o')) ) { /* bullets */ p+=2; while (p[1]=='\b' && /* bold bullets(!) */ (*p=='o' || p[2]=='+') ) p+=2; *q++=c_bullet; iq++; continue; } else if (*p=='\b' && p>plain && p[-1]=='o' && p[1]=='+') { /* OSF bullets */ while (*p=='\b' && p[1]=='+') p+=2; /* bold bullets(!) */ q[-1]=c_bullet; p--; continue; } else if (p[1]=='\b' && *p=='+' && p[2]=='_') { /* plus/minus */ p+=2; *q++=c_plusminus; iq++; continue; } else if (p[1]=='\b' && *p=='|' && p[2]=='-') { /* dagger */ *q++=c_dagger; iq++; p+=2; continue; } else if (*p=='\b') { /* supress unattended backspaces */ continue; } else if (*p=='\x1b' /*&& (p[1]=='9'||p[1]=='8')*/) { p++; if (*p=='[') { p++; if (*p=='1' && hl==-1) { tags[MAXTAGS].first=I+iq; tags[MAXTAGS].type=BOLD; } else if (*p=='0' && hl2==-1 && tags[MAXTAGS].first<I+iq) { /* doesn't catch tag if spans line */ addtag(tags[MAXTAGS].type, tags[MAXTAGS].first, I+iq); } p++; /* following 'm' (why?) gobbled in overarching for */ } /* skip unrecognized escape codes */ continue; } else if ((isupper(*p) /*|| *p=='_'*/ || *p=='&') && (hl>=0 || isupper(p[1]) || p[1]=='&')) { if (hl==-1) { hl=I+iq; tag=SMALLCAPS; } } else { /* end of tag, one way or another */ /* collect tags in this pass, interspersed later if need be */ /* can't handle overlapping tags */ if (hl>=0) { if (hl2==-1) addtag(tag, hl, I+iq); hl=-1; } } /** non-backspace related filtering **/ /* case statement here in place of if chain? */ /* Tk 3.x's text widget tabs too crazy if (*p==' ' && strncmp(" ",p,5)==0) { xputchar('\t'); i+=5-1; ci++; continue; } else */ /* copyright symbol: too much work for so little if (p[i]=='o' && (strncmp("opyright (C) 19",&p[i],15)==0 || strncmp("opyright (c) 19",&p[i],15)==0)) { printf("opyright \xd3 19"); addtag(SYMBOL, ci+9, ci+10); i+=15-1; ci+=13; continue; } else */ if (*p=='(' && q>plain && (isalnum(q[-1])||strchr("._-+",q[-1])!=NULL) && strchr("123456789olnp",p[1])!=NULL /* && p[1]!='s' && p[-1]!='`' && p[-1]!='\'' && p[-1]!='"'*/ ) { hl2=I+iq; for (r=q-1; r>=plain && (isalnum(*r)||strchr("._-+",*r)!=NULL); r--) hl2--; /* else ref to a function? */ /* maybe save position of opening paren so don't highlight it later */ } else if (*p==')' && hl2!=-1) { /* don't overlap tags on man page referenes */ while (tagc>0 && tags[tagc-1].last>hl2) tagc--; addtag(MANREF, hl2, I+iq+1); hl2=-1; } else if (hl2!=-1) { if (!isalnum(*p)) hl2=-1; } if (!*p) break; /* safety check */ *q++=*p; falluc = falluc && (isupper(*p) || isspace(*p) || *p=='-' || *p=='&' || *p=='_'); if (!scnt2) { *ph++=*p; phraselen++; } iq+=iscnt+1; } if (hl>=0) addtag(tag, hl, I+iq); *q=*ph='\0'; linelen=iq+ccnt; } void lowerline(char *p, char *q) { while (*p) *q++=tolower(*p++); *q='\0'; } /* * OUTPUT FORMATS * *** break these out so can selectively include them in the binary *** * *** does this save significant space? *** */ enum command { BEGINDOC, ENDDOC, BEGINBODY, ENDBODY, BEGINHEADER, ENDHEADER, BEGINFOOTER, ENDFOOTER, SHORTLINE, BEGINSECTION, ENDSECTION, BEGINSUBSECTION, ENDSUBSECTION, BEGINSECTHEAD, ENDSECTHEAD, BEGINSUBSECTHEAD, ENDSUBSECTHEAD, BEGINBOLD, ENDBOLD, BEGINITALICS, ENDITALICS, BEGINMANREF, ENDMANREF, BEGINSC, ENDSC, BEGINBOLDITALICS, ENDBOLDITALICS, BEGINY, ENDY, BEGINBULPAIR, ENDBULPAIR, BEGINBULLET, ENDBULLET, BEGINBULTXT, ENDBULTXT, CHARLQUOTE, CHARRQUOTE, CHARPERIOD, CHARDAGGER, CHARBULLET, CHARPLUSMINUS, CHARLSQUOTE, CHARRSQUOTE, CHARGT, CHARLT, CHARAMP, CHANGEBAR, CHARBACKSLASH, CHARDASH, BEGINLINE, ENDLINE, BEGINTABLELINE, ENDTABLELINE, BEGINTABLE, ENDTABLE }; void (*fn)(enum command); enum command prevcmd = BEGINDOC; /* * TkMan */ void manStrip(char *s) { if (*s) { printf("$t insert end {%s} sc \\n\n",s); CurLine++; } } void TkMan(enum command cmd) { static char *headfoot = "Header and Footer"; static int skip=0; static int markcnt=0; static char *bads = "\"[]$"; int i; /* invariant: always ready to insert text */ switch (cmd) { case BEGINDOC: I=0; CurLine=1; escchars = bads; printf("$t insert end \""); break; case ENDDOC: if (fHeadfoot) { /* grr, should have +mark syntax for Tk text widget! printf("\\n\\n\" {} \"%s\\n\" {+headfoot h2}\n",headfoot); */ printf("\\n\\n\" {} \"%s\\n\" h2\n",headfoot); printf("$t mark set headfoot %d.0\n"); CurLine++; manStrip(header); manStrip(header2); manStrip(header3); manStrip(footer); manStrip(footer2); } else printf("\"\n"); break; case BEGINLINE: /*I=0; -- need to do this at end of line so set for filterline() */ /* nothing to do at start of line except catch up on newlines */ for (i=0; i<ncnt; i++) printf("\\n"); CurLine+=ncnt; /* if ((CurLine&0x1f)==0x1f) printf("\" {}\n$t insert end \"");*/ break; case ENDLINE: if (!skip) { printf("\\n"); if (fSEEALSO) { printf("\"\n"); printf("append manx(links) {%s,}\n", plain); printf("$t insert end \""); } } tagc=0; skip=0; CurLine++; I=0; if ((CurLine&0x3f)==0x3f) printf("\" {}\nupdate idletasks\n$t insert end \""); break; case ENDSECTHEAD: /* printf("\\n\" {h2 +js%d}\n$t insert end \"",++markcnt); skip=1;*/ printf("\\n\" h2\n"); printf("$t mark set js%d %d.0\n", ++markcnt, CurLine); tagc=0; printf("$t insert end \""); skip=1; break; case ENDSUBSECTHEAD: /* printf("\\n\" {+jss%d}\n$t insert end \"",++markcnt); skip=1;*/ printf("\\n\"\n"); /* add h3? */ printf("$t mark set jss%d %d.0\n", ++markcnt, CurLine); tagc=0; printf("$t insert end \""); skip=1; break; case BEGINTABLELINE: break; case ENDTABLELINE: printf("\" tt \""); /* addtag(MONO, 0, I);*/ break; case CHARLQUOTE: case CHARRQUOTE: putchar('\\'); putchar('"'); I++; break; case CHARLSQUOTE: putchar('`'); I++; break; case CHARRSQUOTE: putchar('\''); I++; break; case CHARPERIOD: putchar('.'); I++; break; case CHARDASH: putchar('-'); I++; break; case CHARLT: putchar('<'); I++; break; case CHARGT: putchar('>'); I++; break; case CHARAMP: putchar('&'); I++; break; case CHARBACKSLASH: printf("\\\\"); I++; break; case CHARDAGGER: putchar(c_dagger); I++; break; case CHARBULLET: printf("\" {} %c symbol \"",c_bullet); I++; break; case CHARPLUSMINUS: putchar(c_plusminus); I++; break; case BEGINSECTHEAD: case BEGINSUBSECTHEAD: tagc=0; /* section and subsection formatting controlled descriptively */ /* no break;*/ case BEGINBOLD: case BEGINITALICS: case BEGINBOLDITALICS: case BEGINY: case BEGINSC: case BEGINMANREF: /* end text, begin attributed text */ printf("\" {} \""); break; /* rely on the fact that no more than one tag per range of text */ case ENDBOLD: printf("\" b \""); break; case ENDITALICS: printf("\" i \""); break; case ENDBOLDITALICS: printf("\" bi \""); break; case ENDY: printf("\" symbol \""); break; case ENDSC: printf("\" sc \""); break; case ENDMANREF: printf("\" manref \""); break; /* presentation attributes dealt with at end of line */ case BEGINBODY: case ENDBODY: case SHORTLINE: case BEGINBULPAIR: case ENDBULPAIR: case BEGINBULLET: case ENDBULLET: case BEGINBULTXT: case ENDBULTXT: case BEGINSECTION: case ENDSECTION: case BEGINSUBSECTION: case ENDSUBSECTION: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINTABLE: case ENDTABLE: /* no action */ break; } } /* * ASCII */ void ASCII(enum command cmd) { int i; switch (cmd) { case CHARRQUOTE: case CHARLQUOTE: putchar('"'); break; case CHARLSQUOTE: case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); break; case CHARDASH: putchar('-'); break; case CHARLT: putchar('<'); break; case CHARAMP: putchar('&'); break; case CHARBACKSLASH: putchar('\\'); break; case CHARGT: putchar('>'); break; case CHARDAGGER: putchar('+'); break; case CHARBULLET: putchar('*'); break; case CHARPLUSMINUS: printf("+-"); break; case CHANGEBAR: putchar('|'); break; case BEGINLINE: for (i=0; i<ncnt; i++) putchar('\n'); break; case ENDLINE: putchar('\n'); CurLine++; tagc=0; break; case BEGINDOC: case ENDDOC: case BEGINBODY: case ENDBODY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINSECTION: case ENDSECTION: case BEGINSECTHEAD: case ENDSECTHEAD: case BEGINSUBSECTHEAD: case ENDSUBSECTHEAD: case BEGINBULPAIR: case ENDBULPAIR: case BEGINBULLET: case ENDBULLET: case BEGINBULTXT: case ENDBULTXT: case BEGINSUBSECTION: case ENDSUBSECTION: case SHORTLINE: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINBOLD: case ENDBOLD: case BEGINITALICS: case ENDITALICS: case BEGINMANREF: case ENDMANREF: case BEGINBOLDITALICS: case ENDBOLDITALICS: case BEGINY: case ENDY: case BEGINSC: case ENDSC: /* nothing */ break; } } /* * Perl 5 pod ("plain old documentation") */ void pod(enum command cmd) { static int curindent=0; int i; if (hanging==-1) { if (curindent) hanging=curindent; else hanging=5; } if (cmd==BEGINBULPAIR) { if (curindent && hanging!=curindent) printf("\n=back\n\n"); if (hanging!=curindent) printf("\n=over %d\n\n",hanging); curindent=hanging; } else if (cmd==ENDBULPAIR) { /* nothing--wait until next command */ } else if (cmd==BEGINLINE && !scnt) { if (curindent) printf("\n=back\n\n"); curindent=0; } else if (cmd==BEGINBODY) { if (curindent) { printf("\n=back\n\n"); curindent=0; auxindent=0; } } /* case BEGINBULPAIR: printf("=over %d\n\n", hanging); break; case ENDBULPAIR: printf("\n=back\n\n"); break; */ switch (cmd) { case BEGINDOC: I=0; break; case CHARRQUOTE: case CHARLQUOTE: putchar('"'); break; case CHARLSQUOTE: case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); break; case CHARDASH: putchar('-'); break; case CHARLT: putchar('<'); break; case CHARAMP: putchar('&'); break; case CHARBACKSLASH: putchar('\\'); break; case CHARGT: putchar('>'); break; case CHARDAGGER: putchar('+'); break; case CHARPLUSMINUS: printf("+-"); break; case CHANGEBAR: putchar('|'); break; case CHARBULLET: putchar('*'); break; case BEGINLINE: for (i=0; i<ncnt; i++) putchar('\n'); CurLine+=ncnt; break; case ENDLINE: putchar('\n'); CurLine++; tagc=0; I=0; break; case BEGINSECTHEAD: printf("=head1 "); break; case BEGINSUBSECTHEAD: printf("=head2 "); break; case ENDSECTHEAD: case ENDSUBSECTHEAD: printf("\n"); break; case BEGINBOLD: printf("B<"); break; case BEGINITALICS: printf("I<"); break; case BEGINMANREF: printf("L<"); break; case ENDBOLD: case ENDITALICS: case ENDMANREF: printf(">"); break; case BEGINBULLET: printf("\n=item "); break; case ENDBULLET: printf("\n\n"); fcharout=0; break; case BEGINBULTXT: fcharout=1; auxindent=hanging; break; case ENDBULTXT: auxindent=0; break; case ENDDOC: case BEGINBODY: case ENDBODY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINSECTION: case ENDSECTION: case BEGINSUBSECTION: case ENDSUBSECTION: case SHORTLINE: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINBOLDITALICS: case ENDBOLDITALICS: case BEGINY: case ENDY: case BEGINSC: case ENDSC: /* nothing */ break; } } void Sections(enum command cmd) { switch (cmd) { case ENDSECTHEAD: case ENDSUBSECTHEAD: putchar('\n'); case BEGINDOC: fcharout=0; break; case BEGINSUBSECTHEAD: printf(" "); /* no break */ case BEGINSECTHEAD: fcharout=1; break; case CHARRQUOTE: case CHARLQUOTE: xputchar('"'); break; case CHARLSQUOTE: case CHARRSQUOTE: xputchar('\''); break; case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case CHARPERIOD: xputchar('.'); break; case CHARDASH: xputchar('-'); break; case CHARBACKSLASH: xputchar('\\'); break; case CHARLT: xputchar('<'); break; case CHARGT: xputchar('>'); break; case CHARAMP: xputchar('&'); break; case CHARDAGGER: xputchar('+'); break; case CHARBULLET: xputchar('*'); break; case CHARPLUSMINUS: xputchar('+'); xputchar('-'); break; default: /* nothing */ break; } } void Roff(enum command cmd) { int i; switch (cmd) { case BEGINDOC: I=1; printf(".TH %s %s \"generated by RosettaMan\" UCB\n",manName,manSect); printf(".\\\" %s,\n",providence); printf(".\\\" %s\n",anonftp); CurLine=1; break; case BEGINBODY: /*printf(".LP\n");*/ break; case BEGINSECTHEAD: printf(".SH "); break; case BEGINSUBSECTHEAD:printf(".SS "); break; case BEGINBULPAIR: printf(".IP "); break; case SHORTLINE: printf("\n.br"); break; case BEGINBOLD: printf("\\fB"); break; /* \n.B -- grr! */ case ENDBOLD: printf("\\fR"); break; /* putchar('\n'); */ case BEGINITALICS: printf("\\fI"); break; case ENDITALICS: printf("\\fR"); break; case BEGINBOLDITALICS:printf("\\f4"); break; case ENDBOLDITALICS: printf("\\fR"); break; case CHARLQUOTE: printf("\\*(rq"); break; case CHARRQUOTE: printf("\\*(lq"); break; case CHARLSQUOTE: case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: if (I==1) printf("\\&"); putchar('.'); I++; break; case CHARDASH: printf("\\-"); break; case CHARLT: putchar('<'); break; case CHARGT: putchar('>'); break; case CHARAMP: putchar('&'); break; case CHARBULLET: printf("\\(bu"); break; case CHARDAGGER: printf("\\(dg"); break; case CHARPLUSMINUS: printf("\\(+-"); break; case CHANGEBAR: putchar('|'); break; case CHARBACKSLASH: printf("\\\\"); break; /* correct? */ case BEGINLINE: for (i=0; i<ncnt; i++) putchar('\n'); break; case BEGINBULLET: putchar('"'); break; case ENDBULLET: printf("\"\n"); break; case ENDLINE: tagc=0; CurLine++; I=1; /* no break */ case ENDSUBSECTHEAD: case ENDSECTHEAD: case ENDDOC: putchar('\n'); break; case ENDBODY: case ENDBULPAIR: case BEGINBULTXT: case ENDBULTXT: case BEGINSECTION: case ENDSECTION: case BEGINSUBSECTION: case ENDSUBSECTION: case BEGINY: case ENDY: case BEGINSC: case ENDSC: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINMANREF: case ENDMANREF: /* nothing */ break; } } /* * Ensemble */ void EnsembleDumpTags() { int i,j,tag; int fI=0, fB=0, fH=0; if (!tagc) return; printf("}{}{"); /* header */ /* italics */ for (i=0; i<tagc; i++) { tag = tags[i].type; if (tag==ITALICS||tag==BOLDITALICS) { if (!fI) {printf("ITALIC=("); fI=1;} printf("(%d,%d,[T])", tags[i].first, tags[i].last); } } if (fI) printf(")"); /* bold */ for (i=0; i<tagc; i++) { tag = tags[i].type; if (tag==BOLD||tag==BOLDITALICS) { if (!fB) {printf(",BOLD=("); fB=1;} printf("(%d,%d,[T])", tags[i].first, tags[i].last); } } if (fB) printf(")"); /* man ref */ /* for (i=0; i<tagc; i++) { tag = tags[i].type; if (tag==MANREF) { if (!fH) {printf(",HYPER=("); fH=1;} printf("(%d,%d,[???])", tags[i].first, tags[i].last); } } if (fH) printf(")"); */ /* printf("}"); /* trailer */ tagc=0; } void Ensemble(enum command cmd) { switch (cmd) { case BEGINDOC: I=0; printf("DOCUMENT MANPAGE\n<MANPAGE>\n"); escchars = "{}\\"; break; case ENDDOC: printf("</MANPAGE>\n"); break; case BEGINBODY: printf("<SUBSECTIONBODY><BODY>{"); break; case ENDBODY: CurLine++; EnsembleDumpTags(); printf("}</BODY></SUBSECTIONBODY>\n"); tagc=0; break; case BEGINSECTION: printf("<SECTION>"); break; case ENDSECTION: printf("</SECTION>\n"); break; case BEGINSECTHEAD: printf("<SECTHEAD>{"); break; case ENDSECTHEAD: tagc=0; I=0; printf("}</SECTHEAD>\n"); break; case BEGINSUBSECTHEAD: printf("<SUBSECTHEAD>{"); break; case ENDSUBSECTHEAD: tagc=0; I=0; printf("}</SUBSECTHEAD>\n"); break; case BEGINBULPAIR: printf("<SUBSECTIONBODY><LISTELEMENT>"); break; case ENDBULPAIR: printf("</LISTELEMENT></SUBSECTIONBODY>\n"); break; case BEGINBULLET: printf("<BULLET>{"); break; case ENDBULLET: tagc=0; I=0; printf("}</BULLET>"); break; case BEGINBULTXT: printf("<BULLETTEXT>{"); break; case ENDBULTXT: EnsembleDumpTags(); CurLine++; printf("}</BULLETTEXT>"); break; case BEGINSUBSECTION: printf("<SUBSECTIONBODY><SUBSECTION>\n"); break; case ENDSUBSECTION: printf("</SUBSECTION></SUBSECTIONBODY>\n"); break; case SHORTLINE: /*poppush(prevcmd);*/ break; case CHARRQUOTE: case CHARLQUOTE: putchar('"'); I++; break; case CHARLSQUOTE: case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); I++; break; case CHARDASH: putchar('-'); I++; break; case CHARBACKSLASH: putchar('\\'); I++; break; case CHARLT: putchar('<'); I++; break; case CHARGT: putchar('>'); I++; break; case CHARAMP: putchar('&'); I++; break; case CHARBULLET: printf("\\(bu"); I++; break; case CHARDAGGER: printf("\\(dg"); I++; break; case CHARPLUSMINUS: printf("\\(+-"); I++; break; case CHANGEBAR: /* maybe something later */ case BEGINLINE: case ENDLINE: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINBOLD: case ENDBOLD: case BEGINITALICS: case ENDITALICS: case BEGINBOLDITALICS: case ENDBOLDITALICS: case BEGINSC: case ENDSC: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case BEGINMANREF: case ENDMANREF: /* easy strike for hypertext--want to dynamically generate, though */ /* nothing */ break; } } /* * SGML */ /* same as HTML but just has man page-specific DTD */ /* use Davenport man DTD */ void SGML(enum command cmd) { fprintf(stderr, "SGML format needs a DTD\n"); exit(0); /* when get DTD, copy HTML decoding and just change tags */ } /* * HTML */ void HTML(enum command cmd) { static int pre=0; int i; int lasttoc; char *p, *p0; /* static char *bads = "\\<>";*/ /* always respond to these signals */ switch (cmd) { case CHARLQUOTE: printf("""); break; case CHARRQUOTE: printf("""); break; case CHARLSQUOTE: putchar('`'); break; case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); break; case CHARDASH: putchar('-'); break; case CHARBACKSLASH: putchar('\\'); break; case CHARGT: printf(">"); break; case CHARLT: printf("<"); break; case CHARAMP: printf("&"); break; case CHARBULLET: putchar(c_bullet); break; case CHARDAGGER: putchar(c_dagger); break; case CHARPLUSMINUS: putchar(c_plusminus); break; default: break; } /* while in pre mode... */ if (pre) { switch (cmd) { case ENDLINE: I=0; tagc=0; CurLine++; if (!pmode && scnt) printf("<BR>\n"); break; case ENDTABLE: printf("</pre><br>\n"); pre=0; fQS=fIQS=pmode=1; break; default: /* nothing */ break; } return; } /* usual operation */ switch (cmd) { case BEGINDOC: /* escchars = bads;*/ printf("\n",providence); printf("\n",anonftp); printf("<HTML>\n<HEADER>\n"); /* printf("<ISINDEX>\n");*/ /* better title possible? */ printf("<TITLE>"); printf(manTitle, manName, manSect); printf("</TITLE>\n"); printf("</HEADER>\n<BODY>\n"); printf("<A HREF=\"#toc\">Table of Contents</A><P>\n"); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("<P>\n"); if (fHeadfoot) { printf("<HR>\n"); if (*header) printf("%s\n",header); if (*header2) printf("<BR>%s\n",header2); if (*header3) printf("<BR>%s\n",header3); if (*footer) printf("<BR>%s\n",footer); if (*footer2) printf("<BR>%s\n",footer2); } printf("\n<HR><P>\n"); printf("<A NAME=\"toc\"><B>Table of Contents</B></A><P>\n"); printf("<UL>\n"); for (i=0, lasttoc=BEGINSECTION; i<tocc; lasttoc=toc[i].type, i++) { if (lasttoc!=toc[i].type) { if (toc[i].type==BEGINSUBSECTION) printf("<UL>\n"); else printf("</UL>\n"); } printf("<LI><A NAME=\"toc%d\" HREF=\"#sect%d\">%s</A></LI>\n", i, i, toc[i].text); } if (lasttoc==BEGINSUBSECTION) printf("</UL>"); printf("</UL>\n"); /* printf( "<HR><I>conversion to HTML by RosettaMan " "available via <A HREF=\"ftp://ftp.cs.berkeley.edu:/ucb/people/phelps/tcltk/rman.tar.Z\">" "anonymous ftp</A></I>\n" ); */ /* printf("<ADDRESS>phelps@cs.berkeley.edu</ADDRESS>\n");*/ printf("</BODY></HTML>\n"); break; case BEGINBODY: break; case ENDBODY: break; case BEGINSECTION: break; case ENDSECTION: break; case BEGINSECTHEAD: printf("\n<A NAME=\"sect%d\" HREF=\"#toc%d\"><H2>", tocc, tocc); break; case ENDSECTHEAD: printf("</H2></A>\n"); /* useful extraction from files, environment? */ break; case BEGINSUBSECTHEAD: printf("\n<A NAME=\"sect%d\" HREF=\"#toc%d\"><H3>", tocc, tocc); break; case ENDSUBSECTHEAD: printf("</H3></A>\n"); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINBULPAIR: printf("<dl>\n"); break; case ENDBULPAIR: printf("</dl>\n"); break; case BEGINBULLET: printf("<dt>"); break; case ENDBULLET: break; case BEGINLINE: if (ncnt) printf("<P>\n"); break; case ENDLINE: I=0; tagc=0; CurLine++; if (!pmode && scnt) printf("<BR>\n"); break; /* case ENDLINE: I=0; tagc=0; putchar('\n'); break;*/ case BEGINTABLE: printf("<br><pre>\n"); pre=1; fQS=fIQS=pmode=0; break; case ENDTABLE: printf("</pre><br>\n"); pre=0; fQS=fIQS=pmode=1; break; case SHORTLINE: if (!fIP) printf("<BR>\n"); break; case BEGINBULTXT: printf("<dd>"); break; case ENDBULTXT: printf("</dd>\n"); break; /* could use a new list type */ case BEGINBOLD: printf("<B>"); break; case ENDBOLD: printf("</B>"); break; case BEGINITALICS: printf("<I>"); break; case ENDITALICS: printf("</I>"); break; case BEGINBOLDITALICS:printf("<CODE>"); break; case ENDBOLDITALICS: printf("</CODE>"); break; case BEGINMANREF: for (p=hitxt; *p && *p!='('; p++) /* empty */; *p++='\0'; p0=p; for (; *p && *p!=')'; p++) /* empty */; *p='\0'; printf("<A HREF=\""); printf(manRef, hitxt, p0); printf("\">"); break; case ENDMANREF: printf("</A>"); break; case BEGINSC: case ENDSC: case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINTABLELINE: case ENDTABLELINE: case CHANGEBAR: default: /* nothing */ break; } } /* * LaTeX */ void LaTeX(enum command cmd) { int i; char *p; static char *bads = "$&%#_{}^"; /* and more to come */ switch (cmd) { case BEGINDOC: escchars = bads; printf("%% %s,\n", providence); printf("%% %s\n\n", anonftp); /* definitions */ printf( "\\documentstyle{article}\n" "\\def\\thefootnote{\\fnsymbol{footnote}}\n" "\\begin{document}\n" ); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("\n\\end{document}\n"); break; case BEGINBODY: break; case ENDBODY: break; case BEGINSECTION: break; case ENDSECTION: break; case BEGINSECTHEAD: printf("\\section{"); tagc=0; break; case ENDSECTHEAD: printf("}"); /* if (CurLine==1) printf("\\footnote{" "\\it conversion to \\LaTeX\ format by RosettaMan " "available via anonymous ftp from {\\tt ftp.berkeley.edu:/ucb/people/phelps/tcltk}}" ); */ /* useful extraction from files, environment? */ printf("\n"); break; case BEGINSUBSECTHEAD:printf("\\subsection{"); break; case ENDSUBSECTHEAD: printf("}"); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINBULPAIR: printf("\\begin{itemize}\n"); break; case ENDBULPAIR: printf("\\end{itemize}\n"); break; case BEGINBULLET: printf("\\item ["); break; case ENDBULLET: printf("] "); break; case BEGINLINE: if (ncnt) printf("\n\n"); break; case ENDLINE: I=0; tagc=0; /*putchar('\n');*/ CurLine++; break; case BEGINTABLE: printf("\\begin{verbatim}\n"); break; case ENDTABLE: printf("\\end{verbatim}\n"); break; case SHORTLINE: if (!fIP) printf("\n\n"); break; case BEGINBULTXT: break; case ENDBULTXT: putchar('\n'); break; case CHARLQUOTE: printf("``"); break; case CHARRQUOTE: printf("''"); break; case CHARLSQUOTE: putchar('`'); break; case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); break; case CHARDASH: putchar('-'); break; case CHARBACKSLASH: printf("$\\backslash$"); break; case CHARGT: printf("$>$"); break; case CHARLT: printf("$<$"); break; case CHARAMP: printf("\\&"); break; case CHARBULLET: printf("$\\bullet$ "); break; case CHARDAGGER: printf("\\dag "); break; case CHARPLUSMINUS: printf("\\pm "); break; case BEGINBOLD: printf("{\\bf "); break; case BEGINSC: printf("{\\sc "); break; case BEGINITALICS: printf("{\\it "); break; case BEGINBOLDITALICS:printf("{\\bf\\it "); break; case BEGINMANREF: printf("{\\sf "); break; case ENDBOLD: case ENDSC: case ENDITALICS: case ENDBOLDITALICS: case ENDMANREF: putchar('}'); break; case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINTABLELINE: case ENDTABLELINE: case CHANGEBAR: /* nothing */ break; } } /* * Rich Text Format (RTF) */ /* RTF could use more work */ void RTF(enum command cmd) { int i; char *p; static char *bads = "{}"; switch (cmd) { case BEGINDOC: escchars = bads; /* definitions */ printf( /* fonts */ "{\\rtf1\\deff2 {\\fonttbl" "{\\f20\\froman Times;}{\\f150\\fnil I Times Italic;}" "{\\f151\\fnil B Times Bold;}{\\f152\\fnil BI Times BoldItalic;}" "{\\f22\\fmodern Courier;}{\\f23\\ftech Symbol;}" "{\\f135\\fnil I Courier Oblique;}{\\f136\\fnil B Courier Bold;}{\\f137\\fnil BI Courier BoldOblique;}" "{\\f138\\fnil I Helvetica Oblique;}{\\f139\\fnil B Helvetica Bold;}}" "\n" /* style sheets */ "{\\stylesheet{\\li720\\sa120 \\f20 \\sbasedon222\\snext0 Normal;}" "{\\s2\\sb200\\sa120 \\b\\f3\\fs20 \\sbasedon0\\snext2 section head;}" "{\\s3\\li180\\sa120 \\b\\f20 \\sbasedon0\\snext3 subsection head;}" "{\\s4\\fi-1440\\li2160\\sa240\\tx2160 \\f20 \\sbasedon0\\snext4 detailed list;}}" "\n" /* more header to come--do undefined values default to nice values? */ ); I=0; break; case ENDDOC: /* header and footer wanted? */ printf("\\par{\\f150 %s,\n%s}", providence, anonftp); printf("}\n"); break; case BEGINBODY: break; case ENDBODY: CurLine++; printf("\\par\n"); tagc=0; break; case BEGINSECTION: break; case ENDSECTION: printf("\n\\par\n"); break; case BEGINSECTHEAD: printf("{\\s2 "); tagc=0; break; case ENDSECTHEAD: printf("}\\par"); /* useful extraction from files, environment? */ printf("\n"); break; case BEGINSUBSECTHEAD:printf("{\\s3 "); break; case ENDSUBSECTHEAD: printf("}\\par\n"); break; case BEGINSUBSECTION: break; case ENDSUBSECTION: break; case BEGINLINE: /*if (ncnt) printf("\n\n");*/ break; case ENDLINE: I=0; tagc=0; /*putchar('\n'); CurLine++;*/ break; case SHORTLINE: if (!fIP) printf("\\line\n"); break; case BEGINBULPAIR: printf("{\\s4 "); break; case ENDBULPAIR: printf("}\\par\n"); break; case BEGINBULLET: break; case ENDBULLET: printf("\\tab "); fcharout=0; break; case BEGINBULTXT: fcharout=1; break; case ENDBULTXT: break; case CHARLQUOTE: printf("``"); break; case CHARRQUOTE: printf("''"); break; case CHARLSQUOTE: putchar('`'); break; case CHARRSQUOTE: putchar('\''); break; case CHARPERIOD: putchar('.'); break; case CHARDASH: putchar('-'); break; case CHARBACKSLASH: putchar('\\'); break; case CHARGT: putchar('>'); break; case CHARLT: putchar('<'); break; case CHARAMP: putchar('&'); break; case CHARBULLET: printf("\\bullet "); break; case CHARDAGGER: printf("\\dag "); break; case CHARPLUSMINUS: printf("\\pm "); break; case BEGINBOLD: printf("{\\b "); break; case BEGINSC: printf("{\\fs20 "); break; case BEGINITALICS: printf("{\\i "); break; case BEGINBOLDITALICS:printf("{\\b \\i "); break; case BEGINMANREF: printf("{\\f22 "); break; case ENDBOLD: case ENDSC: case ENDITALICS: case ENDBOLDITALICS: case ENDMANREF: putchar('}'); break; case BEGINY: case ENDY: case BEGINHEADER: case ENDHEADER: case BEGINFOOTER: case ENDFOOTER: case BEGINTABLE: case ENDTABLE: case BEGINTABLELINE: case ENDTABLELINE: case CHANGEBAR: /* nothing */ break; } } /*** Kong ***/ /* I hope the compiler has good common subexpression elimination for all the pointer arithmetic. */ /* level 0: DOC - need match level 1: SECTION - need match level 2: SUBSECTION | BODY | BULLETPAIR level 3: BODY (within SUB) | BULLETPAIR (within SUB) | BULTXT (within BULLETPAIR) level 4: BULTXT (within BULLETPAIR within SUBSECTION) never see: SECTHEAD, SUBSECTHEAD, BULLET */ void pop(enum command cmd) { /* int i; int p; int match; p=cmdp-1; for (i=cmdp-1;i>=0; i--) if (cmd==cmdstack[i]) { match=i; break; } */ /* if match, pop off all up to and including match */ /* otherwise, pop off one level*/ if (Pbt) { (*fn)(ENDBULTXT); Pbt=0; } if (cmd==BEGINBULTXT) return; if (Pb && cmd==BEGINBULPAIR) { (*fn)(ENDBODY); Pb=0; } /* special */ if (Pbp) { (*fn)(ENDBULPAIR); Pbp=0; } if (cmd==BEGINBULPAIR) return; if (Pb) { (*fn)(ENDBODY); Pb=0; } if (cmd==BEGINBODY) return; if (Psub) { (*fn)(ENDSUBSECTION); Psub=0; } if (cmd==BEGINSUBSECTION) return; if (Psect) { (*fn)(ENDSECTION); Psect=0; } if (cmd==BEGINSECTION) return; } void poppush(enum command cmd) { pop(cmd); switch (cmd) { case BEGINBULTXT: Pbt=1; break; case BEGINBULPAIR: Pbp=1; break; case BEGINBODY: Pb=1; break; case BEGINSUBSECTION: Psub=1; break; case BEGINSECTION: Psect=1; break; default: fprintf(stderr, "poppush: unrecognized code %d\n", cmd); } (*fn)(cmd); prevcmd = cmd; } /* replace gets. handles hyphenation too */ char * la_gets(char *buf) { static char la_buf[BUFSIZ]; /* can lookahead a full line, but nobody does now */ static int fla=0, hy=0; char *ret,*p; int c,i; if (fla) { /* could avoid copying if callers used return value */ strcpy(buf,la_buf); fla=0; ret=buf; /* correct? */ } else { /*ret=gets(buf); -- gets is deprecated (since it can read too much?) */ /* could do this... ret=fgets(buf, BUFSIZ, stdin); buf[strlen(buf)-1]='\0'; ... but don't want to have to rescan line with strlen, so... */ i=0; p=buf; /* recover spaces if re-linebreaking */ for ( ; hy; hy--, i++) *p++=' '; while ((c=getchar())!=EOF && c!='\n' && i++<BUFSIZ) *p++=c; /* very special case: if in SEE ALSO section, re-linebreak so references aren't linebroken (also do this if fNOHY flag is set) */ if (p>buf && p[-1]=='-' && (pmode || fSEEALSO || fNOHY) && isspace(ungetc(getchar(),stdin))) { p--; /* zap hyphen */ /* start getting next line, spaces first ... */ while ((c=getchar())!=EOF && isspace(c) && c!='\n') hy++; ungetc(c, stdin); /* ... append next nonspace string to previous ... */ while ((c=getchar())!=EOF && !isspace(c) && i++<BUFSIZ) *p++=c; ungetc(c, stdin); /* gobble following spaces (until, perhaps including, end of line) */ while ((c=getchar())!=EOF && isspace(c) && c!='\n') /* empty */; if (c=='\n') hy=0; else ungetc(c, stdin); } *p='\0'; ret=(c!=EOF)?buf:NULL; } lookahead=ungetc(getchar(), stdin); /* only looking ahead one character */ return ret; /* change this to line length? */ } /* buf[] == input text (read only) plain[] == output (initial, trailing spaces stripped; tabs=>spaces; underlines, overstrikes => tag array; spaces squeezed, if requested) ccnt = count of changebars scnt = count of initial spaces linelen = length result in plain[] */ int fHead=0; int fFoot=0; void filter() { enum command tagbeginend[][2] = { /* parallel to enum tagtype */ { -1,-1 }, { BEGINITALICS, ENDITALICS }, { BEGINBOLD, ENDBOLD }, { BEGINY, ENDY }, { BEGINSC, ENDSC }, { BEGINBOLDITALICS, ENDBOLDITALICS }, { BEGINMANREF, ENDMANREF } }; int curtag; char *p,*q,*r,*bp; char head[BUFSIZ]=""; /* first "word" */ char foot[BUFSIZ]=""; int header_m=0, footer_m=0; int headlen=0, footlen=0; int line=1-1; int i,j,k,l,off; int sect,subsect,bulpair,osubsect=0; int title=1; int oscnt=-1; int tt=-1; int empty=0,oempty; int fcont=0; int Pnew=0,I0; float s_avg=0.0; int spaceout; if (fMan) indent=-1; I=1; CurLine=1; (*fn)(BEGINDOC); I0=I; /* run through each line */ while (la_gets(buf)!=NULL) { line++; if (title) I=I0; filterline(buf,plain); /* ALL LINES ARE FILTERED */ fintable = fTable && ((!ncnt && fotable) || (ncnt && bs_cnt>=2 && bs_cnt<=5 && ((float) bs_sum / (float) bs_cnt)>3.0)); if (fintable) { if (!fotable) (*fn)(BEGINTABLE); } else if (fotable) { (*fn)(ENDTABLE); I=I0; tagc=0; filterline(buf,plain); /* rescan first line out of table */ } s_avg=(float) s_sum; if (s_cnt>=2) { /* don't count large second space gap */ if (scnt2) s_avg= (float) (s_sum - scnt2) / (float) (s_cnt-1); else s_avg= (float) (s_sum) / (float) (s_cnt); } p=plain; /* points to current character in plain */ /*** determine header and global indentation ***/ if (fMan && (!fHead || indent==-1)) { if (!linelen) continue; if (*header=='\0') { /* check for missing first header--but this doesn't catch subsequent pages */ if (strcmp(p,"NAME")==0 || strcmp(p,"Name")==0) { indent=scnt; /*filterline(buf,plain);*/ scnt=0; I=I0; fHead=1; } else { fHead=1; (*fn)(BEGINHEADER); /* grab header and its first word */ strcpy(header,p); /* if ((header_m=linelen-HEADFOOTSKIP)<0) header_m=0;*/ if ((header_m=HEADFOOTSKIP)>linelen) header_m=0; /*grabphrase(p);*/ strcpy(head,phrase); headlen=phraselen; la_gets(buf); line++; filterline(buf,plain); if (linelen) { strcpy(header2,plain); lowerline(plain,buf); if (strncmp(buf,"digital",7)==0 || strncmp(buf,"osf",3)==0) { fFoot=1; fSubsections=0; } } (*fn)(ENDHEADER); tagc=0; continue; } } else { /* some idiot pages have a *third* header line, possibly after a null line */ if (*header && scnt>MINMID) { strcpy(header3,p); ncnt=0; /*line++;*/ continue; } /* indent of first line ("NAME") after header sets global indent */ /* check '<' for Plan 9(?) */ if (*p!='<') { indent=scnt; I=I0; /*line++;*/ /*filterline(buf,plain);*/scnt=0; } else continue; } /* if (indent==-1) continue;*/ } if (!lindent && scnt) lindent=scnt; /*printf("lindent = %d, scnt=%d\n",lindent,scnt);*/ /**** for each ordinary line... *****/ /*** skip over global indentation */ oempty=empty; empty=(linelen==0); if (empty) {ncnt++; continue;} /*** strip out per-page titles ***/ if (fMan && (scnt==0 || scnt>MINMID)) { /*printf("***ncnt = %d, fFoot = %d, line = %d***", ncnt,fFoot,line);*/ if (!fFoot && !isspace(*p) && (scnt>5 || (*p!='-' && *p!='_')) && /* don't add ncnt -- line gets absolute line number */ (((ncnt>=2 && line/*+ncnt*/>=61/*was 58*/ && line/*+ncnt*/<70) || (ncnt>=4 && line/*+ncnt*/>=59 && line/*+ncnt*/<74) || (ncnt && line/*+ncnt*/>=61 && line/*+ncnt*/<=66)) && (/*lookahead!=' ' ||*/ (s_cnt>=1 && s_avg>1.1) || !falluc) ) ) { (*fn)(BEGINFOOTER); /* grab footer and its first word */ strcpy(footer,p); /* if ((footer_m=linelen-HEADFOOTSKIP)<0) footer_m=0;*/ if ((footer_m=HEADFOOTSKIP)>linelen) footer_m=0; /*grabphrase(p);*/ strcpy(foot,phrase); footlen=phraselen; footlen--; /* permit variations at end, as for SGI "Page N" */ la_gets(buf); line++; filterline(buf,plain); if (linelen) strcpy(footer2,plain); title=1; (*fn)(ENDFOOTER); tagc=0; /* if no header on first page, try again after first footer */ if (!fFoot && *header=='\0') fHead=0; /* this is dangerous */ fFoot=1; continue; } else /* a lot of work, but only for a few lines (about 4%) */ if (fFoot && (scnt==0 || scnt+indent>MINMID) && ( (headlen && strncmp(head,p,headlen)==0) || strcmp(header2,p)==0 || strcmp(header3,p)==0 || (footlen && strncmp(foot,p,footlen)==0) || strcmp(footer2,p)==0 /* try to recognize lines with dates and page numbers */ /* skip into line */ || (header_m && header_m<linelen && strncmp(&header[header_m],&p[header_m],HEADFOOTMAX)==0) || (footer_m && footer_m<linelen && strncmp(&footer[footer_m],&p[footer_m],HEADFOOTMAX)==0) /* skip into line allowing for off-by-one */ || (header_m && header_m<linelen && strncmp(&header[header_m],&p[header_m+1],HEADFOOTMAX)==0) || (footer_m && footer_m<linelen && strncmp(&footer[footer_m],&p[footer_m+1],HEADFOOTMAX)==0) /* or two */ || (header_m && header_m<linelen && strncmp(&header[header_m],&p[header_m+2],HEADFOOTMAX)==0) || (footer_m && footer_m<linelen && strncmp(&footer[footer_m],&p[footer_m+2],HEADFOOTMAX)==0) /* or with reflected odd and even pages */ || (headlen && headlen<linelen && strncmp(head,&p[linelen-headlen],headlen)==0) || (footlen && footlen<linelen && strncmp(foot,&p[linelen-footlen],footlen)==0) )) { tagc=0; title=1; continue; } /* page numbers at end of line */ for(i=0; p[i] && isdigit(p[i]); i++) /* empty */; if (&p[i]!=plain && !p[i]) {title=1; fFoot=1; continue;} } /*** interline spacing ***/ /* multiple \n: paragraph mode=>new paragraph, line mode=>blank lines */ /* need to chop up lines for Roff */ if (title) ncnt=(scnt!=oscnt || (/*scnt<4 &&*/ isupper(*p))); if (CurLine==1) {ncnt=0; tagc=0;} /* gobble all newlines before first text line */ (*fn)(BEGINLINE); if (/*pmode &&*/ ncnt) Pnew=1; title=0; /*ncnt=0;--moved down*/ if (fintable) (*fn)(BEGINTABLELINE); oscnt=scnt; fotable=fintable; if (pmode && !Pnew && (prevcmd==BEGINBODY || prevcmd==BEGINBULTXT)) { putchar(' '); I++; } /*** identify structural sections and notify fn */ if (fMan) { sect = (scnt==0 && isupper(*p)); subsect=(fSubsections && (scnt==2||scnt==3)); /* bulpair = (scnt<7 && (*p==c_bullet || *p=='-'));*/ /* decode the below */ bulpair = ((!auxindent || scnt!=lindent+auxindent) /*!bulpair*/ && ((scnt>=2 && scnt2>5) || scnt>=5 || (tagc>0 && tags[0].first==scnt) ) /* scnt>=2?? */ && (((*p==c_bullet || *p=='-' || *p=='.' || falluc) && (ncnt || scnt2>4)) || (scnt2-s_avg>=2 && phrase[phraselen-1]!='.') || (scnt2>3 && s_cnt==1) )); if (bulpair) { if (tagc>0 && tags[0].first==scnt) { k=tags[0].last; for (l=1; l<tagc; l++) { if (tags[l].first - k <=3) k=tags[l].last; else break; } phraselen=k-scnt; for (k=phraselen; plain[k]==' ' && k<linelen; k++) /* nothing */; if (k>=5 && k<linelen) hanging=k; else hanging=-1; } else if (scnt2) hanging=phraselen+scnt2; else hanging=5; } else hanging=0; /* hanging = bulpair? phraselen+scnt2 : 0;*/ /*if (bulpair) printf("hanging = %d\n",hanging);*/ /* maybe, bulpair=0 would be best */ } /* certain sections (subsections too?) special, like SEE ALSO */ /* to make canonical name as plain, all lowercase */ if (sect||subsect) { lowerline(plain,buf); fSEEALSO = (strcmp(buf,"see also")==0 || strcmp(buf,"related information")==0); fFILES = (strcmp(buf,"files")==0); } if (sect) { poppush(BEGINSECTION); (*fn)(BEGINSECTHEAD); addtoc(plain, BEGINSECTION, CurLine); } else if (subsect && !osubsect) { poppush(BEGINSUBSECTION); (*fn)(BEGINSUBSECTHEAD); addtoc(plain, BEGINSUBSECTION, CurLine); } else if (bulpair) { poppush(BEGINBULPAIR); (*fn)(BEGINBULLET); fIP=1; /*grabphrase(plain);*/ } else if (Pnew) { poppush(BEGINBODY); } Pnew=0; /* move change bars to left */ if (fChangeleft) { if (pmode) (*fn)(CHANGEBAR); else for (i=0; i<ccnt; i++) { xputchar('|'); /* (*fn)(CHANGEBAR); ?*/ } } /* show initial spaces */ if (!fIQS && fcharout) { spaceout = (scnt>ccnt)?(scnt-ccnt):0; if (fILQS) { if (spaceout>=lindent) spaceout-=lindent; else spaceout=0; } if (auxindent) { if (spaceout>=auxindent) spaceout-=auxindent; else spaceout=0; } printf("%*s",spaceout,""); } /*** iterate over each character in line, ***/ /*** handling underlining, tabbing, copyrights ***/ off=(!fIQS&&!pmode)?scnt:0; for (i=0, p=plain, curtag=0, fcont=0; *p; p++,i++,fcont=0) { /* interspersed presentation signals */ /* start tags in reverse order of addition (so structural first) */ if (curtag<tagc && i+I0+off==tags[curtag].first) { for (r=hitxt, j=tags[curtag].last-tags[curtag].first, hitxt[j]='\0'; j; j--) hitxt[j-1]=p[j-1]; (*fn)(tagbeginend[tags[curtag].type][0]); } /* special characters */ switch(*p) { case '"': if (p==plain || isspace(p[-1])) { (*fn)(CHARLQUOTE); fcont=1; } else if (isspace(p[1])) { (*fn)(CHARRQUOTE); fcont=1; } break; case '\'': if (p==plain || isspace(p[-1])) { (*fn)(CHARLSQUOTE); fcont=1; } else if (isspace(p[1])) { (*fn)(CHARRSQUOTE); fcont=1; } break; case '-': /* check for -opt => \-opt */ if (p==plain || (isspace(p[-1]) && !isspace(p[1]))) { (*fn)(CHARDASH); fcont=1; } break; case '\\': (*fn)(CHARBACKSLASH); fcont=1; break; case '<': (*fn)(CHARLT); fcont=1; break; case '>': (*fn)(CHARGT); fcont=1; break; case '&': (*fn)(CHARAMP); fcont=1; break; case c_dagger: (*fn)(CHARDAGGER); fcont=1; break; case c_bullet: (*fn)(CHARBULLET); fcont=1; break; case c_plusminus: (*fn)(CHARPLUSMINUS); fcont=1; break; case '.': (*fn)(CHARPERIOD); fcont=1; break; } /*default:*/ if (!fcont && fcharout) { if (strchr(escchars,*p)!=NULL) {putchar('\\');} putchar(*p); I++; } if (curtag<tagc && i+I0+off+1==tags[curtag].last) { (*fn)(tagbeginend[tags[curtag].type][1]); curtag++; } if (fIP && ((*p==' ' && i==phraselen) || *p=='\0')) { p++; /* needed but why? */ (*fn)(ENDBULLET); fIP=0; if (*p!='\0') { /*oscnt+=phraselen;*/ oscnt+=i; for (r=p; *r==' '; r++) { oscnt++; /* i++; if (fQS || !fcharout) p++; */ } } p--; /* increment in loop */ poppush(BEGINBULTXT); } } /*** end of line in buf[] ***/ /*** deal with section titles, hyperlinks ***/ if (sect) { (*fn)(ENDSECTHEAD); Pnew=1; } else if (subsect) { (*fn)(ENDSUBSECTHEAD); Pnew=1; } else if (fIP) { (*fn)(ENDBULLET); fIP=0; poppush(BEGINBULTXT); } /* oscnt not right here */ else if (scnt+linelen+spcsqz<MINRM /*&& ncnt*/ && lookahead!='\n' && prevcmd!=BEGINBULTXT && prevcmd!=ENDSUBSECTHEAD && prevcmd!=ENDSUBSECTHEAD) (*fn)(SHORTLINE); osubsect=subsect; if (fintable) (*fn)(ENDTABLELINE); /*if (!pmode)*/ (*fn)(ENDLINE); ncnt=0; I0=I; /* save I here in case skipping lines screws it up */ } /* wrap up at end */ pop(ENDDOC); (*fn)(ENDDOC); } int main(int argc, char *argv[]) { int c; int i; int fname=0; extern char *optarg; extern int optind, opterr; char lcfilter[BUFSIZ]; #ifdef macintosh extern void InitToolbox(); InitToolbox(); #endif fn=ASCII; /* default output format */ while ((c=getopt(argc,argv,"Kh?f:l:r:bckmTpvn:t:s:y"))!=-1) switch (c) { case 'k': fHeadfoot=1; break; case 'b': fSubsections=1; break; case 'c': fChangeleft=1; break; case 'n': strcpy(manName,optarg); fname=1; break; /* name & section for when using stdin */ case 's': strcpy(manSect,optarg); break; case 'l': manTitle = optarg; break; case 'r': manRef = optarg; break; case 't': TabStops=atoi(optarg); break; case 'm': fMan=0; break; case 'T': fTable=1; break; case 'p': pmode=1-pmode; break; case 'K': fFoot=1; break; case 'f': /* various filters */ /* make name lower case */ for (i=0; i<strlen(optarg); i++) lcfilter[i]=tolower(optarg[i]); lcfilter[i]='\0'; /* things a bit too irregular for a table of types */ if (strncmp(lcfilter,"tkman",UFP)==0) { fn=TkMan; pmode=fQS=fIQS=0; } else if (strncmp(lcfilter,"ascii",UFP)==0) { fn=ASCII; pmode=fQS=fIQS=0; } else if (strncmp(lcfilter,"roff",UFP)==0 || strncmp(lcfilter,"nroff",UFP)==0 || strncmp(lcfilter,"troff",UFP)==0) { fn=Roff; pmode=0; fNOHY=1; fChangeleft=1; fIQS=1; fQS=1; } else if (strncmp(lcfilter,"ensemble",UFP)==0) { fn=Ensemble; pmode=fChangeleft=fQS=fIQS=1; } else if (strncmp(lcfilter,"html",UFP)==0 || strncmp(lcfilter,"www",UFP)==0) { fn=HTML; pmode=1; fChangeleft=fQS=fIQS=1; } else if (strncmp(lcfilter,"sgml",UFP)==0) { fprintf(stderr, "Support for the Davenport DTD will be coming Real Soon Now.\n"); exit(0); /*fn=SGML; pmode=fChangeleft=fQS=fIQS=1;*/ } else if (strncmp(lcfilter,"sections",UFP)==0) { fn=Sections; fQS=fIQS=1; } else if (strncmp(lcfilter,"latex",UFP)==0) { fn=LaTeX; pmode=fQS=fIQS=1; } else if (strncmp(lcfilter,"rtf",UFP)==0) { fn=RTF; pmode=fQS=fIQS=1; } else if (strncmp(lcfilter,"pod",UFP)==0) { fn=pod; pmode=fIQS=fQS=0; fILQS=/*fQS=*/fChangezap=1; } else if (strncmp(lcfilter,"ps",UFP)==0 || strncmp(lcfilter,"postscript",UFP)==0) { fprintf(stderr, "%s: use groff or psroff to generate PostScript\n", argv[0]); exit(1); } else if (strncmp(lcfilter,"mif",UFP)==0 || strncmp(lcfilter,"framemaker",UFP)==0) { fprintf(stderr, "%s: FrameMaker has filters to convert from roff to MIF.\n", argv[0]); exit(1); } else { fprintf(stderr, "%s: unknown filter: %s\n", argv[0], optarg); exit(1); } break; case 'v': printf("RosettaMan v" ROSETTAMANVERSION "\n"); exit(0); break; case 'y': fNOHY=1; break; case 'h': case '?': fprintf(stderr, "rman [-f <ASCII|ROFF|TkMan|Ensemble|Sections|HTML|SGML|LaTeX|RTF|pod>]\n" " [-k(eep head/foot)] [-b(show subsections)] [-c(hangebarstoleft)]\n" " [-t(abstops) <number>] [-n(ame of man page) <string>] [-s(ection) <number>]\n" " [-m(an page aggressive parsing off)] [-T(able agressive parsing off)]\n" " [-v(ersion)] [-K (no page breaks)] [-p(aragraph mode) toggle]\n" " [-r <man ref printf string>] [-l <title printf string>] [-y (zap hyphens)]\n" " [<filename>]\n" ); exit(0); break; default: fprintf(stderr, "%s: unidentified option -%c (-h for help)\n",argv[0],c); exit(2); } /* read from given file name(s) */ if (optind<argc) { if (!fname) { /* if no name given, create from file name */ strcpy(manName,argv[optind]); /* search backward from end for final dot. split there */ for (i=strlen(manName); i>=0; i--) { if (manName[i]=='.') { strcpy(manSect,&manName[i+1]); manName[i]='\0'; break; } } } if (freopen(argv[optind], "r", stdin)==NULL) { fprintf(stderr, "%s: can't open %s\n", argv[0],argv[optind]); exit(1); } } /* minimal check for roff source: first character dot command or apostrophe comment */ lookahead = ungetc(getchar(), stdin); if (lookahead=='.' || lookahead=='\'') { fprintf(stderr, "%s:\tInput looks like [tn]roff source--RosettaMan needs formatted text\n" "\tfrom `nroff -man' or from */man/cat[1-8oln] directories.\n", argv[0]); exit(1); } filter(); return 0; }